meta_dat <- read_rds("phc_meta_trends.rds")
completes_dat <- read_rds("phc_lucid_completes.rds")
ycls_dat <- read_rds("phc_replications.rds")
summary_dat <- read_rds("phc_summary.rds")

###--------Figure 1---------------
g <-
  ggplot(completes_dat, aes(y = completes, x = date)) +
  geom_smooth(
    color = "black",
    fill = "grey70",
    size = 0.6,
    alpha = 0.5
  ) +
  scale_x_date(
    expand = c(0.01, 0.01),
    date_breaks = "3 month",
    date_labels = "%b '%y"
  ) +
  scale_y_continuous(labels = scales::label_number_si()) +
  geom_point(
    size = 3.25,
    pch = 21,
    color = "white",
    fill = "black",
    alpha = 0.7
  ) +
  labs(x = "", y = "") +
  theme_fivethirtyeight() +
  theme(
    plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "lines"),
    legend.position = "none",
    strip.text = element_text(size = 10.28),
    strip.text.x = element_text(size = 10.28),
    panel.background = element_blank(),
    plot.background = element_blank(),
    legend.background = element_blank(),
    legend.box.background = element_blank(),
    legend.key = element_blank(),
    panel.grid.major.y = element_line(size = 0.3),
    panel.grid.major.x = element_line(size = 0.3),
    axis.text.x = element_text(size = 10),
    axis.title.x = element_text(size = 10)
  ) +
  geom_text_repel(
    data = (. %>% filter(completes > 100000)),
    aes(label = "Week before\n2020 election"),
    color = "black",
    min.segment.length = 0,
    box.padding = 0.5,
    nudge_y = 0,
    nudge_x = 80,
    direction = "y",
    point.padding = 0.5,
    size = 3.5,
    family = "serif"
  )

g

g %>%
  ggsave(filename = "manuscript_fig_1.pdf",
         .,
         width = 6.5,
         height = 4)

###--------Figure 2-3---------------
x <- summary_dat %>% filter(study == "Pre-COVID summary")
y <- summary_dat %>% filter(study == "YCLS summary")

dat <-
  left_join(x,
            y,
            by = c("study_group_detail"),
            suffix = c("_pre", "_ycls")) %>%
  select(-study_ycls,-study_group_ycls,-study_pre) %>%
  rename(study_group = study_group_pre) %>%
  ungroup() %>%
  mutate(
    estimate_diff = estimate_ycls - estimate_pre,
    se_diff = sqrt(std.error_ycls ^ 2 + std.error_pre ^ 2),
    p_diff = 2 * (1 - pnorm(abs(
      estimate_diff / se_diff
    ))),
    sign_diff = sign(estimate_ycls) != sign(estimate_pre),
    sig_diff = ifelse(p_diff < 0.05, "Significant", "Not Significant"),
    conjoint = ifelse(
      study_group == "Hainmueller & Hopkins (2015)",
      "Conjoint Estimates",
      "Non-Conjoint Estimates"
    )
  )

## Correspondence by study:
dat %>%
  filter(!sign_diff) %>%
  group_by(study_group) %>%
  summarise(avg = mean(estimate_ycls / estimate_pre)) %>%
  mutate_if(is.numeric, round, 2) %>%
  arrange(desc(avg))

## Overall summary of correspondence among replication estimates w/ same sign:
dat %>%
  filter(!sign_diff) %>%
  ungroup() %>%
  summarise(
    avg = mean(estimate_ycls / estimate_pre),
    inflated_aronow =  mean((estimate_ycls / 0.70) / estimate_pre)
  )

## Compare conjoint v. non-conjoint
dat %>%
  filter(!sign_diff) %>%
  ungroup() %>%
  group_by(conjoint) %>%
  summarise(
    avg = mean(estimate_ycls / estimate_pre),
    n = n(),
    inflated_aronow =  mean((estimate_ycls / 0.70) / estimate_pre)
  )


## How many significant diffs in non-conjoint?
dat %>%
  filter(study_group != "Hainmueller & Hopkins (2015)") %>%
  mutate(p_adjust = (p.adjust(p_diff, method = "fdr"))) %>% 
  group_by(sign_diff) %>%
  summarise(
    estimate_smaller = sum(estimate_diff < 0),
    estimate_larger = sum(estimate_diff > 0),
    sign_diff = sum(sign_diff),
    p_diff_unadjus = sum(p_diff < 0.05),
    p_diff_adjust = sum(p_adjust < 0.05)
  )

## Conjoint: 41 estimates in total.
dat %>%
  filter(study_group == "Hainmueller & Hopkins (2015)") %>%
  mutate(p_adjust = (p.adjust(p_diff, method = "fdr"))) %>% 
  group_by(estimate_smaller = (estimate_diff < 0)) %>%
  summarise(
    estimate_smaller = sum(estimate_diff < 0),
    estimate_larger = sum(estimate_diff > 0),
    sign_diff = sum(sign_diff),
    p_diff_unadjus = sum(p_diff < 0.05),
    p_diff_adjust = sum(p_adjust < 0.05)
  )



g1 <-
  dat %>%
  filter(study_group != "Hainmueller & Hopkins (2015)") %>%
  mutate(
    p_adjust = (p.adjust(p_diff, method = "fdr")),
    sig_diff_adj = ifelse(p_adjust < 0.05, "Significant", "Not Significant")
  ) %>% 
  ggplot(
    .,
    aes(
      x = estimate_pre,
      y = estimate_ycls,
      group = study_group,
      color = sig_diff,
      fill = sig_diff,
      shape = sig_diff_adj
    )
  ) +
  geom_abline(
    intercept = 0,
    slope = 1,
    size = 0.5,
    color = "black",
    alpha = 0.9,
    lty = 2
  ) +
  geom_vline(
    xintercept = 0,
    size = 0.5,
    color = "black",
    alpha = 0.9
  ) +
  geom_hline(
    yintercept = 0,
    size = 0.5,
    color = "black",
    alpha = 0.9
  ) +
  geom_rug(alpha = 1 / 2,
           color = "black",
           length = unit(0.015, "npc")) +
  geom_linerange(aes(xmin = conf.low_pre, xmax = conf.high_pre)) +
  geom_linerange(aes(ymin = conf.low_ycls, ymax = conf.high_ycls)) +
  scale_x_continuous(limits = c(-0.25, 2.1), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-0.25, 2.1), expand = c(0, 0)) +
  geom_point(size = 3,
             color = "white",
             alpha = 0.8) +
  scale_fill_grey("Difference-in-Difference: ",
                  start = 0.5,
                  end = 0) +
  scale_color_grey("Difference-in-Difference: ",
                   start = 0.5,
                   end = 0) +
  scale_shape_manual("Difference-in-Difference: ", values = c(21, 22)) +
  labs(x = "Pre-COVID summary estimate",
       y = "COVID-era summary estimate") +
  theme_ycls() +
  theme(
    legend.position = "bottom",
    legend.justification = "left",
    axis.ticks.y = element_line(size = 0.1,  color = "lightgrey"),
    axis.ticks.x = element_line(size = 0.1,  color = "lightgrey"),
    axis.text.y = element_text(size = 10),
    panel.grid.major = element_line(size = 0.1, color = "lightgrey"),
    legend.margin = margin(t = -0.05, unit = 'cm'),
    legend.box.margin = margin(t = -0.05, unit = "cm"),
    panel.background = element_blank(),
    plot.background = element_blank(),
    legend.background = element_blank(),
    legend.box.background = element_blank(),
    legend.key = element_blank(),
    plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "lines")
  ) 

g1


## NB: conjoint has 41 estimates; 28 estimates from non-conjoint
g2 <-
  dat %>%
  filter(study_group == "Hainmueller & Hopkins (2015)") %>%
  mutate(
    p_adjust = (p.adjust(p_diff, method = "fdr")),
    sig_diff_adj = ifelse(p_adjust < 0.05, "Significant", "Not Significant")
  ) %>% 
  ggplot(
    .,
    aes(
      x = estimate_pre,
      y = estimate_ycls,
      group = study_group,
      fill = sig_diff,
      color = sig_diff,
      shape = sig_diff_adj
    )
  ) +
  geom_abline(
    intercept = 0,
    slope = 1,
    size = 0.5,
    color = "black",
    alpha = 0.9,
    lty = 2
  ) +
  geom_vline(
    xintercept = 0,
    size = 0.5,
    color = "black",
    alpha = 0.9
  ) +
  geom_hline(
    yintercept = 0,
    size = 0.5,
    color = "black",
    alpha = 0.9
  ) +
  geom_rug(alpha = 1 / 2,
           color = "black",
           length = unit(0.015, "npc")) +
  geom_linerange(aes(xmin = conf.low_pre, xmax = conf.high_pre)) +
  geom_linerange(aes(ymin = conf.low_ycls, ymax = conf.high_ycls)) +
  scale_x_continuous(limits = c(-0.05, 0.3)) +
  scale_y_continuous(limits = c(-0.05, 0.3)) +
  geom_point(size = 3,
             color = "white",
             alpha = 0.8) +
  scale_fill_grey("Difference-in-Difference: ",
                  start = 0.5,
                  end = 0) +
  scale_color_grey("Difference-in-Difference: ",
                   start = 0.5,
                   end = 0) +
  scale_shape_manual("Difference-in-Difference: ", values = c(21, 22)) +
  labs(x = "Pre-COVID summary estimate",
       y = "COVID-era summary estimate") +
  theme_ycls() +
  theme(
    legend.position = "bottom",
    legend.justification = "left",
    axis.ticks.y = element_line(size = 0.1,  color = "lightgrey"),
    axis.ticks.x = element_line(size = 0.1,  color = "lightgrey"),
    axis.text.y = element_text(size = 10),
    panel.grid.major = element_line(size = 0.1, color = "lightgrey"),
    legend.margin = margin(t = -0.05, unit = 'cm'),
    legend.box.margin = margin(t = -0.05, unit = "cm"),
    panel.background = element_blank(),
    plot.background = element_blank(),
    legend.background = element_blank(),
    legend.box.background = element_blank(),
    legend.key = element_blank(),
    plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "lines")
  )


table(g1$data$estimate_diff < 0, g1$data$sig_diff)
table(g1$data$estimate_diff < 0, g1$data$sig_diff_adj)


g1 %>%
  ggsave(filename = "manuscript_fig_2.pdf",
         .,
         width = 6,
         height = 4.5)

table(g2$data$estimate_diff < 0, g2$data$sig_diff)
table(g2$data$estimate_diff < 0, g2$data$sig_diff_adj)

g2 %>%
  ggsave(filename = "manuscript_fig_3.pdf",
         .,
         width = 6,
         height = 5)


###--------Figure 4---------------
g <-
  ggplot(meta_dat,
         aes(
           x = time,
           weight = 1 / (std.error ^ 2),
           y = est
         )) +
  scale_x_date(date_labels = "%b '%y") +
  geom_smooth(
    color = "black",
    fill = "grey70",
    size = 0.5,
    alpha = 0.25
  ) +
  geom_point(
    aes(size = as.numeric(size_group),
        fill = pre_covid),
    pch = 21,
    color = "white",
    alpha = 0.8,
    position = position_dodge2(width = 1.5, preserve = "single")
  ) +
  facet_row(~ name) +
  scale_color_grey("Pre-COVID sample: ", start = 0, end = 0.4) +
  scale_fill_grey("Pre-COVID sample: ", start = 0, end = 0.4) +
  labs(x = "",
       y = "",
       size = "Sample size:") +
  scale_y_continuous(labels = scales::label_percent(accuracy = 1)) +
  scale_size(
    range = c(2, 4),
    labels = c("< 1,000", "1,000-2,000",
               "2,000-3,000", "3,000+")
  ) +
  theme_ycls() +
  theme(
    plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "lines"),
    legend.position = "bottom",
    legend.justification = "left",
    axis.ticks.y = element_line(size = 0.1,  color = "lightgrey"),
    axis.ticks.x = element_line(size = 0.1,  color = "lightgrey"),
    axis.text.y = element_text(size = 10),
    panel.grid.major = element_line(size = 0.1, color = "lightgrey"),
    legend.margin = margin(t = -0.3, unit = 'cm'),
    legend.box.margin = margin(t = -0.3, unit = "cm"),
    panel.background = element_blank(),
    plot.background = element_blank(),
    legend.background = element_blank(),
    legend.box.background = element_blank(),
    legend.key = element_blank(),
    legend.box.just = "left",
    legend.box = "vertical"
  ) +
  guides(fill = guide_legend(override.aes = list(size = 3)),
         size = guide_legend(override.aes = list(color = "black")))

g %>%
  ggsave(filename = "manuscript_fig_4.pdf",
         .,
         width = 6.5,
         height = 3.5)

###--------Table 2---------------

## Duration: browsers v. web-applications
ycls_dat %>%
  group_by(admin_browser) %>%
  do(tidy(lm_robust(admin_duration_minutes ~ 1,
                    data = .)))

## Duration: non-mobiles v. mobiles
ycls_dat %>%
  group_by(admin_nonmobile) %>%
  do(tidy(lm_robust(admin_duration_minutes ~ 1,
                    data = .)))

## Pass rates for hard ACQ: browsers v. web-applications
app_hard_pass <-
  ycls_dat %>%
  group_by(admin_browser) %>%
  do(tidy(lm_robust(peyton_pass_acq ~ 1,
                    data = .))) %>%
  mutate_if(is.numeric, round, 3) %>%
  mutate(level = "Hard",
         type = "Web-App")

## Pass rates for easy ACQ: browsers v. web-applications
app_easy_pass <-
  ycls_dat %>%
  group_by(admin_browser) %>%
  do(tidy(lm_robust(huber_acq_easy ~ 1,
                    data = .))) %>%
  mutate_if(is.numeric, round, 3) %>%
  mutate(level = "Easy",
         type = "Web-App")

## Pass rates for medium ACQ: browsers v. web-applications
app_medium_pass <-
  ycls_dat %>%
  group_by(admin_browser) %>%
  do(tidy(lm_robust(huber_acq_hard ~ 1,
                    data = .))) %>%
  mutate_if(is.numeric, round, 3) %>%
  mutate(level = "Medium",
         type = "Web-App")

## Pass rates for hard ACQ: non-mobiles v. mobiles
mobile_hard_pass <-
  ycls_dat %>%
  group_by(admin_nonmobile) %>%
  do(tidy(lm_robust(peyton_pass_acq ~ 1,
                    data = .))) %>%
  mutate_if(is.numeric, round, 3) %>%
  mutate(level = "Hard",
         type = "Mobile")

## Pass rates for easy ACQ: non-mobiles v. mobiles
mobile_easy_pass <-
  ycls_dat %>%
  group_by(admin_nonmobile) %>%
  do(tidy(lm_robust(huber_acq_easy ~ 1,
                    data = .))) %>%
  mutate_if(is.numeric, round, 3) %>%
  mutate(level = "Easy",
         type = "Mobile")

## Pass rates for medium ACQ: non-mobiles v. mobiles
mobile_medium_pass <-
  ycls_dat %>%
  group_by(admin_nonmobile) %>%
  do(tidy(lm_robust(huber_acq_hard ~ 1,
                    data = .))) %>%
  mutate_if(is.numeric, round, 3) %>%
  mutate(level = "Medium",
         type = "Mobile")

## Combine estimates and generate table
mobile_acq_means <-
  bind_rows(mobile_easy_pass, mobile_medium_pass, mobile_hard_pass) %>%
  ungroup() %>%
  mutate(
    device = ifelse(admin_nonmobile, "Non-Mobile", "Mobile"),
    device = factor(device, levels = c("Non-Mobile", "Mobile"))
  ) %>%
  select(estimate, std.error, level, device) %>%
  mutate(entry = table_entry(est = estimate, se = std.error, digits = 2)) %>%
  select(level, device, entry) %>%
  arrange(device) %>%
  pivot_wider(names_from = c(device),
              values_from = c(entry))

app_acq_means <-
  bind_rows(app_easy_pass, app_medium_pass, app_hard_pass) %>%
  ungroup() %>%
  mutate(device = ifelse(admin_browser, "Browser", "Web-App")) %>%
  select(estimate, std.error, level, device) %>%
  mutate(entry = table_entry(est = estimate, se = std.error, digits = 2)) %>%
  select(level, device, entry) %>%
  arrange(device) %>%
  pivot_wider(names_from = c(device),
              values_from = c(entry))

mobile_acq_diffs <-
  bind_rows(mobile_easy_pass, mobile_medium_pass, mobile_hard_pass) %>%
  ungroup() %>%
  mutate(
    device = ifelse(admin_nonmobile, "Non-Mobile", "Mobile"),
    device = factor(device, levels = c("Non-Mobile", "Mobile"))
  ) %>%
  select(estimate, std.error, level, device) %>%
  pivot_wider(names_from = c(device),
              values_from = c(estimate, std.error)) %>%
  mutate(
    diff =  `estimate_Non-Mobile` - `estimate_Mobile`,
    se_diff = sqrt(`std.error_Non-Mobile` ^ 2 + `std.error_Mobile` ^
                     2),
    p_diff = 2 * (1 - pnorm(abs(diff / se_diff)))
  ) %>%
  select(level, diff, se_diff, p_diff) %>%
  mutate(entry = make_entry(
    est = diff,
    se = se_diff,
    p = p_diff,
    digits = 2
  )) %>%
  select(level, entry)

app_acq_diffs <-
  bind_rows(app_easy_pass, app_medium_pass, app_hard_pass) %>%
  ungroup() %>%
  mutate(device = ifelse(admin_browser, "Browser", "Web-App")) %>%
  select(estimate, std.error, level, device) %>%
  pivot_wider(names_from = c(device),
              values_from = c(estimate, std.error)) %>%
  mutate(
    diff = `estimate_Browser` - `estimate_Web-App`,
    se_diff = sqrt(`std.error_Browser` ^ 2 + `std.error_Web-App` ^
                     2),
    p_diff = 2 * (1 - pnorm(abs(diff / se_diff)))
  ) %>%
  select(level, diff, se_diff, p_diff) %>%
  mutate(entry = make_entry(
    est = diff,
    se = se_diff,
    p = p_diff,
    digits = 2
  )) %>%
  select(level, entry)

## Export table of output
app_acq_summary <-
  left_join(app_acq_means, app_acq_diffs)

mobile_acq_summary <-
  left_join(mobile_acq_means, mobile_acq_diffs)

acq_summary <-
  left_join(app_acq_summary, mobile_acq_summary, by = "level")

xtable(acq_summary) %>%
  print(
    include.rownames = FALSE,
    include.colnames = FALSE,
    hline.after = c(),
    only.contents = TRUE,
    type = "latex",
    file = paste0("manuscript_table_2.tex")
  )

###--------Figure 5---------------
peyton_original <-
  read_csv("peyton_original.csv") %>%
  mutate(
    Z_trust = factor(
      Z,
      levels = c("Corrupt", "Placebo", "Honest"),
      labels = c("Corrupt", "Control", "Honest")
    ),
    Z_trust_n = case_when(
      Z_trust == "Corrupt" ~ 0,
      Z_trust == "Control" ~ 0.5,
      Z_trust == "Honest" ~ 1
    ),
    D = glass_delta(Y = fmptrust_index, Z = Z_trust,
                    level = "Control"),
    Y = glass_delta(Y = fedspend_index, Z = Z_trust,
                    level = "Control")
  ) %>%
  filter(study_type == "Politics")

peyton_rep <-
  ycls_dat %>%
  filter(str_detect(week, "Week 9")) %>%
  mutate(
    Z_trust = factor(
      peyton_Z,
      levels = c("corrupt", "placebo",
                 "honest"),
      labels = c("Corrupt", "Control", "Honest")
    ),
    Z_trust_n = case_when(
      Z_trust == "Corrupt" ~ 0,
      Z_trust == "Control" ~ 0.5,
      Z_trust == "Honest" ~ 1
    ),
    D = glass_delta(Y = peyton_trust_index, Z = Z_trust,
                    level = "Control"),
    Y = glass_delta(Y = peyton_fedspend_index, Z = Z_trust,
                    level = "Control")
  )

## Combine datasets and replication sub-groups
all_data_sets <-
  bind_rows(
    `MTurk sample, Jun 2014` = peyton_original %>% filter(experiment == "MTurk_Pols14"),
    `Qualtrics sample, Sep 2014` = peyton_original %>% filter(experiment == "GenPop_Pols14"),
    `MTurk sample, Mar 2017` = peyton_original %>% filter(experiment == "MTurk_Pols17"),
    `Full Sample` = peyton_rep,
    `Attentive: Passed ACQ` = peyton_rep %>% filter(peyton_pass_acq == 1),
    `Inattentive: Failed ACQ` = peyton_rep %>% filter(peyton_pass_acq == 0),
    `Attentive: Internet browser` = peyton_rep %>% filter(admin_browser == 1),
    `Inattentive: Web-Application` = peyton_rep %>% filter(admin_browser == 0),
    `Attentive: Non-mobile device` = peyton_rep %>% filter(admin_nonmobile == 1),
    `Inattentive: Mobile device` = peyton_rep %>% filter(admin_nonmobile == 0),
    .id = "dataset"
  ) %>%
  filter(!is.na(Z_trust_n))

## Compare ACQ pass rates:
mean(peyton_original$attention_math, na.rm = TRUE)
mean(peyton_rep$peyton_pass_acq, na.rm = TRUE)

## Estimate first stage fx of Z on D for each dataset/sub-group
gg_df <-
  all_data_sets %>%
  group_by(dataset) %>%
  do(tidy(lm_robust(D ~ Z_trust_n,
                    data = .))) %>%
  filter(term != "(Intercept)") %>%
  ungroup() %>%
  add_row(.,
          dataset = c("Original experiments:",
                      "Lucid replication:")) %>%
  mutate(
    study = ifelse(str_detect(paste(dataset), "MTurk|Qualtrics"), "Original", "Lucid"),
    study = factor(
      study,
      levels = c("Original", "Lucid"),
      labels = c("Original study:",
                 "Lucid replication:")
    ),
    dataset = factor(dataset,
                     rev(
                       c(
                         "Original experiments:",
                         "MTurk sample, Jun 2014",
                         "Qualtrics sample, Sep 2014",
                         "MTurk sample, Mar 2017",
                         "Lucid replication:",
                         "Full Sample",
                         "Attentive: Passed ACQ",
                         "Inattentive: Failed ACQ",
                         "Attentive: Internet browser",
                         "Inattentive: Web-Application",
                         "Attentive: Non-mobile device",
                         "Inattentive: Mobile device"
                       )
                     ))
  )

g <-
  ggplot(gg_df,
         aes(
           x = estimate,
           y = dataset,
           fill = study,
           shape = study
         )) +
  geom_vline(xintercept = 0,
             lty = 2,
             size = 0.5) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high, color = study),
    height = 0,
    position = position_dodgev(height = 0.5),
    size = 1.25,
    na.rm = TRUE
  ) +
  geom_point(
    position = position_dodgev(height = 0.5),
    size = 3.25,
    color = "white",
    alpha = 0.8,
    na.rm = TRUE
  ) +
  scale_fill_grey("", start = 0.5, end = 0) +
  scale_color_grey("", start = 0.5, end = 0) +
  scale_shape_manual("", values = c(21, 22)) +
  xlab("Average causal effect estimate (standard units)") +
  ylab("") +
  theme_ycls() +
  theme(
    axis.line.x = element_line(size = 0.5),
    strip.text.x = element_text(hjust = 0),
    axis.text.y = element_text(
      face = c(rep('plain', 7), 'bold', 'plain', 'plain', 'plain', 'bold'),
      size = c(rep(10, 7), 11, 10, 10, 10, 11)
    )
  ) +
  coord_capped_cart(bottom = "none")

g

fixed <- 1
spacing <- .285

g %>%
  ggsave(
    filename = "manuscript_fig_5.pdf",
    .,
    width = 6.5,
    height = fixed + spacing * nrow(g$data)
  )
